#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@创建时间    : 2022/12/7  17:57
@作者  : st
@文件名: data_process_medical_1207.py
@项目名: PyCharm
@文件描述:

"""
from process.data_process_medical_1118 import get_medical_tag_datas
from process.data_process_nlpcc import get_to_text
from utils.common_utils import cut_sent_seg

# text_list, res_list = get_to_text()
text_list, res_list = get_medical_tag_datas()
print('--------------------')
print('数据数：', len(text_list))
count = 0
for x in text_list:
    count += len(x)
print('总字数：', count)
count = 0
for x in text_list:
    count += len(cut_sent_seg(x))
print('总句子：', count)
word_num = 0
for x in res_list:
    word_num += len(x)
print('新词数：', word_num)

